(c) 2019, Dr. Ramil Nugmanov; Dr. Timur Madzhidov; Ravil Mukhametgaleev
Installation instructions of CGRtools package information and tutorial's files see on https://github.com/cimm-kzn/CGRtools
NOTE: Tutorial should be performed sequentially from the start. Random cell running will lead to unexpected results.
In [ ]:
import pkg_resources
if pkg_resources.get_distribution('CGRtools').version.split('.')[:2] != ['3', '1']:
print('WARNING. Tutorial was tested on 3.1 version of CGRtools')
else:
print('Welcome!')
In [ ]:
# load data for tutorial
from pickle import load
from traceback import format_exc
with open('reactions.dat', 'rb') as f:
reactions = load(f) # list of ReactionContainer objects
r1 = reactions[0] # reaction
cgr2 = ~r1
cgr2.reset_query_marks()
In [ ]:
from CGRtools.files import * # import all available readers and writers
with RDFread('example.rdf') as f:
first = next(f) # get first reaction using generator
data = f.read() # read remaining reactions to list of ReactionContainers
data = []
with RDFread('example.rdf') as f:
for r in f: # looping is supported. Useful for large files.
data.append(r)
with RDFread('example.rdf') as f:
data = [r for r in f] # list comprehensions application. Result is equivalent to f.read()
In [ ]:
from pathlib import Path
with RDFread(Path('example.rdf')) as r: # OOP style call
r = next(r)
In [ ]:
with open('example.rdf') as f, RDFread(f) as r:
r = next(r) # OOP style application
Readers designed transparently support any type of data sources.
Page https://cimm.kpfu.ru/seafile/f/aeaca685e3854ae2bbad/?dl=1 returns RDF file.
Data sources should be file-like objects.
In [ ]:
from requests import get
from io import StringIO
# get function return requested URL which has attribute text.
# in example this text is whole RDF stored in single string.
# RDFread does not support parsing of strings, but one can emulate files with data
# instead of strings by using io.StringIO
with StringIO(get('https://cimm.kpfu.ru/seafile/f/aeaca685e3854ae2bbad/?dl=1').text) as f, RDFread(f) as r:
r = next(r)
print(r, 'StringIO downloaded from network data')
# python support gzipped data. This example shows how to work with compressed
# data directly without decompressing them to disk.
from gzip import open as gzip_open
with gzip_open('example.rdf.gz', 'rt') as f, RDFread(f) as r:
r = next(r)
print(r, 'gzipped file')
# zip-files also supported out of the box
# zipped files can be opened only in binary mode. io.TextIOWrapper can be used for transparent decoding them into text
from zipfile import ZipFile
from io import TextIOWrapper
with ZipFile('example.zip') as z, z.open('example.rdf') as c:
with TextIOWrapper(c) as f, RDFread(f) as r:
r = next(r)
print(r, 'zip archive')
# tar-file reading example
from tarfile import open as tar_open
from io import TextIOWrapper
with tar_open('example.tar.gz') as t:
c = t.extractfile('example.rdf')
with TextIOWrapper(c) as f, RDFread(f) as r:
r = next(r)
print(r, 'gzipped tar archive')
All files except MRV should be opened in text-mode
MRV requires binary mode open('/path/to/data.mrv', 'rb')
In [ ]:
with MRVread(open('example.mrv', 'rb')) as f:
mrv = next(f)
mrv
Export in following file formats is supported:
Writers has the same API as readers. All writers work with text-files
Writers has write
method which accepts as argument single reaction, molecule or CGR object
In [ ]:
with RDFwrite('out.rdf') as f: # context manager supported
for r in data:
f.write(r)
# file out.rdf will be overriden
In [ ]:
f = RDFwrite('out.rdf') # ongoing writing into a single file
for r in data:
f.write(r)
f.write(r1)
f.close() # close file. Flushes Python writer buffers.
In [ ]:
from CGRtools.files import *
from io import StringIO
with StringIO() as f, SDFwrite(f) as w:
w.write(cgr2) # file writing in SDF format
mdl = f.getvalue() # get formatted file to print out
print(mdl) # It is how CGR looks like.
# Notice that most of field are conventional MOL fields, S-queries are used for dynamic bond and atom specification
In [ ]:
with StringIO(mdl) as f, SDFread(f) as r: # import SDF file with CGR
cgr3 = next(r)
print(cgr3)
print(type(cgr3))
In [ ]:
from pickle import loads, dumps
In [ ]:
loads(dumps(r1)) # load reaction from Pickle dump